## ----------------------------------------------------------------------
##
## Figure 8: Sequential g-estimation 
##
## ----------------------------------------------------------------------

## environment setting
Sys.setenv(LANGUAGE="en")
gc();gc()
rm(list = ls())
options(scipen = 999)   ## Disable scientific notation

## ----------------------------------------------------------------------
##
## Initial settings
##
## ----------------------------------------------------------------------
## Set directory
## ----------------------------------------------------------------------
root_dir = "YOUR_DIRECTORY/Replication_Folder"
## Load packages and functions
source(file.path(root_dir, "2_Code/1_PackagesFunctions.R"))

## ----------------------------------------------------------------------
## Data directory
data_dir = root_dir	%>%	file.path("1_Data")	%>%	dir_create()
## Output directory
output_dir = root_dir	%>%	file.path("3_Result")	%>%	dir_create()
figure_dir = root_dir	%>%	file.path("4_Figures")	%>%	dir_create()
## Working directory
working_dir = root_dir	%>%	file.path("5_Working")	%>%	dir_create()
## ----------------------------------------------------------------------


## ----------------------------------------------------------------------
##
## Data and model preparation
##
## ----------------------------------------------------------------------
## ----------------------------------------------------------------------
## Data and model preparation
## ----------------------------------------------------------------------
## Load census data w/ raid damage variable
tokyo_census_tbl = data_dir	%>%
  file.path("census_data4.csv") %>%
  read_csv(col_types = cols()) 
## ----------------------------------------------------------------------


## ----------------------------------------------------------------------
## Variables
## ----------------------------------------------------------------------
## Dependent variables (t)
## ----------------------------------------------------------------------
depvar_vec = c(
  "lnr_unemp", "lnrm_unemp", "lnrf_unemp",	## Unemployment rate
  "lnr_proexe", "lnave_lvlength", "lnave_eduyr", "lnrmale0004"	## Prop boys
)
## Lagged dependent variables (t-1)
lagged_depvar_vec = str_c(depvar_vec, "_lagged")
## DV labels for figures
outcome_lbl = c("ln % Unemp", "ln % Unemp (Male)", "ln % Unemp (Female)", "ln % Prof Exec", "ln Ave Res Yrs", "ln Ave Edu Yrs", "ln % Male <6 y/o")
outcome_lbl = outcome_lbl	%>%
  as.data.frame	%>%
  mutate(depvar = depvar_vec)
names(outcome_lbl)[1] = "outcome_lbl"
## ----------------------------------------------------------------------
## Treatment and mediator variables
## ----------------------------------------------------------------------
## --- Treatment
treatment = "ratio_damage"	## New treatment variable Dec 2018
## --- New population
mediator = "lnr_newpop"		## logged new population ratio
## --- 6F+ apartment
mediator2 = "lnr_kyodojutaku6up"
## ----------------------------------------------------------------------
## X: Pretreatment covariates
## ----------------------------------------------------------------------
distance_termz_base = c("palace_dist_ln", "minTargetDistance_ln")
## Polynomial
polynomial_degree = 5
distance_termz_poly = polynomial_varname(distance_termz_base, degree = polynomial_degree)
pretreatment_covariates = c(
  ## Residential ratio and geographical area
  "ratio_residential", "poly_area_ln", "n_neighbor",
  ## Prewar population density
  "PopDensity_1939_km2_ln",
  ## Distance terms
  distance_termz_poly,
  ## Terrain terms
  "mean_elevation_ln", "mean_slope_ln", "river_distance_ln",
  ## Railway terms
  "railway_length_ln", "n_stations", "SpLag_railway_length_ln", "SpLag_n_stations"
)
## ----------------------------------------------------------------------
## Z: Intermediate confounders
## ----------------------------------------------------------------------
z_base = c(
  "r_popyoung", "r_popold", "ave_age", "r_popforeign",
  "r_singlehh", "r_myhouse", "r_kyodojutaku",
  "r_worker3sec", "r5pop", "r5r_worker2sec", "PopDensity"
)
## Logged Z variables
z_base_ln = str_c(z_base, "_ln")
z_lagged_ln = str_c(z_base_ln, "_lagged")	## these are included in the dmediation function
## ----------------------------------------------------------------------
## Spatial polynomial and fixed effects
## ----------------------------------------------------------------------
fixed_effect = "as.factor(prewar_district)"	## 35-district FE
lonlat_base = c("z_lon", "z_lat")
polynomialz = c(
  polynomial_varname(lonlat_base, degree = polynomial_degree),
  "I(z_lon*z_lat)", "I(z_lon^2*z_lat)", "I(z_lon*z_lat^2)","I(z_lon^2*z_lat^2)",
  "I(z_lon^3*z_lat)", "I(z_lon*z_lat^3)", "I(z_lon^4*z_lat)", "I(z_lon*z_lat^4)", "I(z_lon^3*z_lat^2)", "I(z_lon^2*z_lat^3)"
)
## ----------------------------------------------------------------------


## ----------------------------------------------------------------------
## Prepare data
## ----------------------------------------------------------------------
tokyo4gest_20002010 = tokyo_census_tbl	%>%	filter(year == 2000|year == 2010)	%>%
  drop_na(all_of(treatment))	%>%
  ## compute logged Z
  mutate_at(
    vars(all_of(z_base)), list(ln = ~log(. + 0.01))
  )	%>%
  ## grouping
  group_by(key_code)  %>%
  ## compute lagged Y and Z
  mutate_at(
    vars(all_of(depvar_vec), all_of(z_base_ln)), list(lagged = ~lag(.))
  )	%>% ungroup %>%
  ## Standardize for polynomial terms
  mutate_at(
    vars(all_of(distance_termz_base)), list(scale2)
  )	%>%
  mutate_at(
    vars(all_of(distance_termz_base)), list(as.numeric)
  )	%>%
  ## drop raw and simply logged (but not lagged) Z variables
  select(-all_of(z_base), -all_of(z_base_ln))

tokyo4gest_19952000 = tokyo4gest_20002010


## ----------------------------------------------------------------------
##
## Sequential g-estimation
##
## ----------------------------------------------------------------------
## Bootstrap settings
n_boot = 1000*5
boot_loop = 1:n_boot
## ----------------------------------------------------------------------
## Output matrix master
master_matrix = matrix(NA, nrow = n_boot, ncol = 2) %>% as.data.frame
names(master_matrix) = c("outcome", "boot_est")
## ----------------------------------------------------------------------


## ----------------------------------------------------------------------
## Loop over mediator values
## mediator = "lnr_newpop"
## ----------------------------------------------------------------------
set.seed(1234)
for (i in 1:length(depvar_vec)) {
  ## ----------------------------------------------------------------------
  ## Set models
  tmp_depvar = depvar_vec[i]      ## set DV
  tmp_lagged_depvar = lagged_depvar_vec[i]  ## set mediator (lagged DV)
  ##  --- ATE model simply conditioning on pretreatment covariates
  mod_ATE = chr2fml(tmp_depvar, idv_list = list(treatment, pretreatment_covariates, fixed_effect, polynomialz))
  ##  --- naive ACDE model simply conditioning on pretreatment covariates and mediator
  mod_withMwoZ = chr2fml(tmp_depvar, idv_list = list(treatment, mediator, pretreatment_covariates, fixed_effect, polynomialz))
  ##  --- naive ACDE model fully conditioning on pretreatment covariates, mediator, and intermediate confounders
  mod_wMandZ = chr2fml(tmp_depvar, idv_list = list(treatment, mediator, pretreatment_covariates, tmp_lagged_depvar, z_lagged_ln, fixed_effect, polynomialz))
  ##  ---- ACDE model
  acde_mod = update(mod_ATE, demediated_outcome ~ .)
  ## ----------------------------------------------------------------------
  
  ## ----------------------------------------------------------------------
  ## Prepare data obj for estimation
  tokyo4gest = tokyo4gest_19952000    ## 1995 and 2000 datasets -> replaced with 2000-2010 data, see above
  if (str_detect(tmp_depvar, "lnave")) tokyo4gest = tokyo4gest_20002010   ## replace data: 2000 and 2010 datasets
  tmp_reg_data = get_all_vars(update(mod_wMandZ, . ~ . + year + pop), data = tokyo4gest) %>% drop_na
  ## ----------------------------------------------------------------------
  ## Output matrix
  tmp_ACDE_matrix = master_matrix
  tmp_ACDE_matrix[,1] = depvar_vec[i]
  tmp_ATE_matrix = tmp_woZ_matrix = tmp_naive_matrix = tmp_ACDE_matrix
  ## ----------------------------------------------------------------------
  ## Bootstrap
  for (b in boot_loop) {
    ## ----------------------------------------------------------------------
    ## Print progress
    cat(str_c("Outcome: ", i, "/", length(depvar_vec), " bootstrap ", b, "/", n_boot, " done. \r"))
    ## ----------------------------------------------------------------------
    ## Bootstrap sample
    boot_sample = sample_frac(tmp_reg_data, size = 1, replace = TRUE)
    ## ----------------------------------------------------------------------
    ## Set mediator value (recentering)
    boot_sample[,mediator] = boot_sample[,mediator] - mean(boot_sample[,mediator])
    ## ----------------------------------------------------------------------
    ## Run models
    est_ATE = speedlm(mod_ATE, data = boot_sample, weights = boot_sample$pop)
    est_withMwoZ = speedlm(mod_withMwoZ, data = boot_sample, weights = boot_sample$pop)
    est_withMandZ = speedlm(mod_wMandZ, data = boot_sample, weights = boot_sample$pop)  ## <- first stage model
    ## ----------------------------------------------------------------------
    ## Demediate the outcome
    boot_sample$demediated_outcome = boot_sample[,tmp_depvar] - coef(est_withMandZ)[mediator]*boot_sample[,mediator]
    ## Second-stage estimate
    direct_est = speedlm(acde_mod, data = boot_sample, weights = boot_sample$pop)
    ## ----------------------------------------------------------------------
    ## Store results (current round)
    tmp_ATE_matrix[b,2] = coef(est_ATE)[treatment]
    tmp_woZ_matrix[b,2] = coef(est_withMwoZ)[treatment]
    tmp_naive_matrix[b,2] = coef(est_withMandZ)[treatment]
    tmp_ACDE_matrix[b,2] = coef(direct_est)[treatment]
    ## ----------------------------------------------------------------------
  }
  ## ----------------------------------------------------------------------
  ## Combine bootstrap results (all outcomes)
  if (i > 1) {
    ATE_matrix = bind_rows(ATE_matrix, tmp_ATE_matrix)
    woZ_matrix = bind_rows(woZ_matrix, tmp_woZ_matrix)
    naive_matrix = bind_rows(naive_matrix, tmp_naive_matrix)
    ACDE_matrix = bind_rows(ACDE_matrix, tmp_ACDE_matrix)
  }   else    {
    ATE_matrix = tmp_ATE_matrix
    woZ_matrix = tmp_woZ_matrix
    naive_matrix = tmp_naive_matrix
    ACDE_matrix = tmp_ACDE_matrix
  }
  ## ----------------------------------------------------------------------
}

## ----------------------------------------------------------------------
## Add estimator column
ATE_matrix$estimator = "ATE"
woZ_matrix$estimator = "NaiveACDE_withoutZ"
naive_matrix$estimator = "NaiveACDE_withZ"
ACDE_matrix$estimator = "ACDE_gest"
## ----------------------------------------------------------------------
## Combine
boot_fullresult = ATE_matrix	%>%
  as_tibble	%>%
  bind_rows(ACDE_matrix)	%>%
  bind_rows(woZ_matrix)	%>%
  bind_rows(naive_matrix)	%>%
  arrange(outcome, estimator)	%>%
  mutate(mediator = mediator)
## ----------------------------------------------------------------------
## Cancel comment-out to save the bootstrap results
## Bootstrap output directory
# boot_dir_sub = file.path(output_dir, str_c(mediator, "_boot_n_", n_boot, "_censusmediation")) %>%	dir_create()
## ----------------------------------------------------------------------
## Save result
# file_name = str_c("bootstrap", n_boot, "treatment", treatment, "mediator", mediator, "result.csv", sep = "_")
# write_csv(boot_fullresult, path = file.path(boot_dir_sub, file_name))
## ----------------------------------------------------------------------



## ----------------------------------------------------------------------
## Loop over mediator values
## mediator = "lnr_kyodojutaku6up"
## ----------------------------------------------------------------------
set.seed(1234)
for (i in 1:length(depvar_vec)) {
  ## ----------------------------------------------------------------------
  ## Set models
  tmp_depvar = depvar_vec[i]      ## set DV
  tmp_lagged_depvar = lagged_depvar_vec[i]  ## set mediator (lagged DV)
  ##  --- ATE model simply conditioning on pretreatment covariates
  mod_ATE = chr2fml(tmp_depvar, idv_list = list(treatment, pretreatment_covariates, fixed_effect, polynomialz))
  ##  --- naive ACDE model simply conditioning on pretreatment covariates and mediator
  mod_withMwoZ = chr2fml(tmp_depvar, idv_list = list(treatment, mediator2, pretreatment_covariates, fixed_effect, polynomialz))
  ##  --- naive ACDE model fully conditioning on pretreatment covariates, mediator, and intermediate confounders
  mod_wMandZ = chr2fml(tmp_depvar, idv_list = list(treatment, mediator2, pretreatment_covariates, tmp_lagged_depvar, z_lagged_ln, fixed_effect, polynomialz))
  ##  ---- ACDE model
  acde_mod = update(mod_ATE, demediated_outcome ~ .)
  ## ----------------------------------------------------------------------
  
  ## ----------------------------------------------------------------------
  ## Prepare data obj for estimation
  tokyo4gest = tokyo4gest_19952000    ## 1995 and 2000 datasets -> replaced with 2000-2010 data, see above
  if (str_detect(tmp_depvar, "lnave")) tokyo4gest = tokyo4gest_20002010   ## replace data: 2000 and 2010 datasets
  tmp_reg_data = get_all_vars(update(mod_wMandZ, . ~ . + year + pop), data = tokyo4gest) %>% drop_na
  ## ----------------------------------------------------------------------
  ## Output matrix
  tmp_ACDE_matrix = master_matrix
  tmp_ACDE_matrix[,1] = depvar_vec[i]
  tmp_ATE_matrix = tmp_woZ_matrix = tmp_naive_matrix = tmp_ACDE_matrix
  ## ----------------------------------------------------------------------
  ## Bootstrap
  for (b in boot_loop) {
    ## ----------------------------------------------------------------------
    ## Print progress
    cat(str_c("Outcome: ", i, "/", length(depvar_vec), " bootstrap ", b, "/", n_boot, " done. \r"))
    ## ----------------------------------------------------------------------
    ## Bootstrap sample
    boot_sample = sample_frac(tmp_reg_data, size = 1, replace = TRUE)
    ## ----------------------------------------------------------------------
    ## Set mediator value (recentering)
    boot_sample[,mediator2] = boot_sample[,mediator2] - mean(boot_sample[,mediator2])
    ## ----------------------------------------------------------------------
    ## Run models
    est_ATE = speedlm(mod_ATE, data = boot_sample, weights = boot_sample$pop)
    est_withMwoZ = speedlm(mod_withMwoZ, data = boot_sample, weights = boot_sample$pop)
    est_withMandZ = speedlm(mod_wMandZ, data = boot_sample, weights = boot_sample$pop)  ## <- first stage model
    ## ----------------------------------------------------------------------
    ## Demediate the outcome
    boot_sample$demediated_outcome = boot_sample[,tmp_depvar] - coef(est_withMandZ)[mediator2]*boot_sample[,mediator2]
    ## Second-stage estimate
    direct_est = speedlm(acde_mod, data = boot_sample, weights = boot_sample$pop)
    ## ----------------------------------------------------------------------
    ## Store results (current round)
    tmp_ATE_matrix[b,2] = coef(est_ATE)[treatment]
    tmp_woZ_matrix[b,2] = coef(est_withMwoZ)[treatment]
    tmp_naive_matrix[b,2] = coef(est_withMandZ)[treatment]
    tmp_ACDE_matrix[b,2] = coef(direct_est)[treatment]
    ## ----------------------------------------------------------------------
  }
  ## ----------------------------------------------------------------------
  ## Combine bootstrap results (all outcomes)
  if (i > 1) {
    ATE_matrix = bind_rows(ATE_matrix, tmp_ATE_matrix)
    woZ_matrix = bind_rows(woZ_matrix, tmp_woZ_matrix)
    naive_matrix = bind_rows(naive_matrix, tmp_naive_matrix)
    ACDE_matrix = bind_rows(ACDE_matrix, tmp_ACDE_matrix)
  }   else    {
    ATE_matrix = tmp_ATE_matrix
    woZ_matrix = tmp_woZ_matrix
    naive_matrix = tmp_naive_matrix
    ACDE_matrix = tmp_ACDE_matrix
  }
  ## ----------------------------------------------------------------------
}

## ----------------------------------------------------------------------
## Add estimator column
ATE_matrix$estimator = "ATE"
woZ_matrix$estimator = "NaiveACDE_withoutZ"
naive_matrix$estimator = "NaiveACDE_withZ"
ACDE_matrix$estimator = "ACDE_gest"
## ----------------------------------------------------------------------
## Combine
boot_fullresult2 = ATE_matrix	%>%
  as_tibble	%>%
  bind_rows(ACDE_matrix)	%>%
  bind_rows(woZ_matrix)	%>%
  bind_rows(naive_matrix)	%>%
  arrange(outcome, estimator)	%>%
  mutate(mediator = mediator2)
## ----------------------------------------------------------------------
## Cancel comment-out to save the bootstrap results
## Bootstrap output directory
# boot_dir_sub2 = file.path(output_dir, str_c(mediator2, "_boot_n_", n_boot, "_censusmediation")) %>%	dir_create()
## ----------------------------------------------------------------------
## Save result
# file_name = str_c("bootstrap", n_boot, "treatment", treatment, "mediator", mediator2, "result.csv", sep = "_")
# write_csv(boot_fullresult2, path = file.path(boot_dir_sub2, file_name))
## ----------------------------------------------------------------------

## ----------------------------------------------------------------------
## ------------------------ Estimations end here ------------------------
## ----------------------------------------------------------------------



## ----------------------------------------------------------------------
## <<<---------------------<<< Plot estimates >>>--------------------->>>
## ----------------------------------------------------------------------

## ----------------------------------------------------------------------
##
## Plot: Summary Comparison Plot
## mediator = lnr_newpop
##
## ----------------------------------------------------------------------
## Cancel comment-out to read the saved estimates
# boot_fullresult_path = list.files(boot_dir_sub, pattern = ".csv", full.names = TRUE)
# boot_fullresult = read.csv(boot_fullresult_path)
## ----------------------------------------------------------------------
## Set alpha
alpha_level = 0.95
crit_valz = c((1-alpha_level)/2, 1-(1-alpha_level)/2)
## ----------------------------------------------------------------------
## Pull out outcome and estimator vectors
outcome_vec = boot_fullresult$outcome   %>% unique
estimator_vec = boot_fullresult$estimator   %>% unique
n_estimatorz = estimator_vec    %>% length
n_outcomez = outcome_vec    %>% length
## ----------------------------------------------------------------------
## Colors
# colz_vec = c("blue2", "firebrick2", "gray65", "magenta2")
# alpha_vec = c(0.6, 0.15, 1)
colz_vec = c("lightsteelblue4", "lightsteelblue4", "gray65", "magenta2")
alpha_vec = c(0.6, 0.3, 1)
## ----------------------------------------------------------------------
## Convert for comparison
## ----------------------------------------------------------------------
## Compute delta = ATE - ACDE
plot_summary = boot_fullresult  %>%
  mutate(
    boot_id = rep(
      seq(1, (nrow(boot_fullresult)/n_outcomez/n_estimatorz), by = 1
      ),
      n_outcomez*n_estimatorz)
  )   %>%
  ## convert to wide-format
  spread(key = estimator, value = boot_est)   %>%
  mutate(
    delta_unbiased = ATE - ACDE_gest
  )
## ----------------------------------------------------------------------
## Bootstrap means and CIs
boot_meanz = plot_summary   %>%
  select(-boot_id)    %>%
  ## convert to long-format
  gather(
    key = estimator, value = boot_est, -(outcome:mediator)
  )   %>%
  select(-mediator)   %>%
  group_by(outcome, estimator)    %>%
  summarize(
    boot_mean = mean(boot_est),
    ci_lower = boot_mean - 1.96*sd(boot_est),
    ci_upper = boot_mean + 1.96*sd(boot_est)
  )   %>% ungroup
## ----------------------------------------------------------------------
## Combine means and CIs, then set colors
boot_summary = boot_meanz   %>%
  # bind_cols(boot_ciz)  %>%
  mutate(
    point_col = ifelse(boot_mean>0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower>0 & ci_upper>0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower<0 & ci_upper<0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col),
    estimator_order = rep(c(3, 2, 1, 4, 5), n_outcomez)
  )   %>%
  arrange(estimator_order)
boot_summary = outcome_lbl  %>%
  left_join(boot_summary, by = c("depvar" = "outcome"))   %>%
  mutate(row_id = row_number())
## ----------------------------------------------------------------------
## Figure parameters
fig_width = 4.85
base_cex = .85
fig_mrgn = c(8.95, 2.4, 1.65, .5) + 0.1    ## figure margin: bottom, left, top, right
y_range = c(-0.12, 0.12)
# y_tick = seq(y_range[1], y_range[2], by = 0.05)
y_tick = seq(-0.1, y_range[2], by = 0.05)
y_tick_sub = seq(-1, 1, by = 0.01)
separation_linez = c(seq(5, 30, by = 5) + 0.5, 40)
separation_linez_sub = seq(3, 33, by = 5) + 0.5
rect_width = 0.5
point_est_lwd = 1.5
## ----------------------------------------------------------------------
## Outcome label
outcome_pos = seq(3, 33, by = 5)
outcome_pos_1 = c(1, 3, 5, 7)
outcome_pos_2 = c(2, 4, 6)
## ----------------------------------------------------------------------
## Estimator and outcome labels
estimator_lbl = c("ATE-ACDE", "ATE (conditioning on X)", "ACDE (g-estimator)", "Naive model w/ M", "Naive model w/ M & Z")
outcome_lbl_vec = boot_summary  %>%
  select(outcome_lbl) %>%
  unique  %>% as.matrix   %>% as.vector
## ----------------------------------------------------------------------
## Plot
pdf(file.path(figure_dir, str_c("fig_8a.pdf")), width = plt_ratio(fig_width), height = fig_width)
par(cex = base_cex, mar = fig_mrgn, lend = "square")
x_range = c(0.25, (nrow(boot_summary)+0.75))
plot(x = boot_summary$row_id, y = boot_summary$boot_mean, ylim = y_range, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)
## Background
for (i in 1:length(separation_linez_sub)) {
  rect(xleft = separation_linez_sub[i], xright = separation_linez[i], ytop = 10, ybottom = -10, col = adjustcolor("gray", alpha = 0.2), lwd = NA)
}
## Separation lines
abline(v = separation_linez)
abline(v = separation_linez_sub, lty = "dashed", lwd = .5)
## Rectangles
rect(xleft = boot_summary$row_id-rect_width/2, xright = boot_summary$row_id+rect_width/2, ybottom = boot_summary$ci_lower, ytop = boot_summary$ci_upper, col = boot_summary$point_col, border = "black", lwd = 0.5)
## Point estimates
# segments(x0 = boot_summary$row_id-0.32, x1 = boot_summary$row_id+0.32, y0 = boot_summary$boot_mean)
segments(x0 = boot_summary$row_id-0.2, x1 = boot_summary$row_id+0.2, y0 = boot_summary$boot_mean, lwd = point_est_lwd)
## ----------------------------------------------------------------------
## Horizontal axis
axis(1, line = 0, at = 1:nrow(boot_summary), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez, label = NA, tck = -1)
# axis(1, line = 0, at = separation_linez_sub, label = NA, tck = -1, lty = "dotted")
## Estimator label
colz = c(rep("black", 3), rep("gray60", 2))
mtext(estimator_lbl, side = 1, line = 0.5, at = 1:nrow(boot_summary), las = 2, cex = 0.8, col = colz)
## Outcome labels
outcome_prefix = c("DV: ", rep("", times = (length(outcome_pos_1)-1)))
mtext(str_c(outcome_prefix, outcome_lbl_vec[outcome_pos_1]), side = 3, line = 0, at = outcome_pos[outcome_pos_1], cex = 0.8)
mtext(outcome_lbl_vec[outcome_pos_2], side = 3, line = 0.85, at = outcome_pos[outcome_pos_2], cex = 0.8)
## ----------------------------------------------------------------------
## Vertical axis
for (j in c(2, 4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)
mtext("Effect Estimates (95% bootstrap CIs)", side = 2, line = 1.65, cex = 0.8)
## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()

dev.off()
## ----------------------------------------------------------------------



## ----------------------------------------------------------------------
##
## Plot: Summary Comparison Plot
## mediator = lnr_kyodojutaku6up
##
## ----------------------------------------------------------------------
## Cancel comment-out to read the saved estimates
# boot_fullresult2_path = list.files(boot_dir_sub2, pattern = ".csv", full.names = TRUE)
# boot_fullresult2 = read.csv(boot_fullresult2_path)
## ----------------------------------------------------------------------
## Set alpha
alpha_level = 0.95
crit_valz = c((1-alpha_level)/2, 1-(1-alpha_level)/2)
## ----------------------------------------------------------------------
## Pull out outcome and estimator vectors
outcome_vec = boot_fullresult2$outcome   %>% unique
estimator_vec = boot_fullresult2$estimator   %>% unique
n_estimatorz = estimator_vec    %>% length
n_outcomez = outcome_vec    %>% length
## ----------------------------------------------------------------------
## Colors
# colz_vec = c("blue2", "firebrick2", "gray65", "magenta2")
# alpha_vec = c(0.6, 0.15, 1)
colz_vec = c("lightsteelblue4", "lightsteelblue4", "gray65", "magenta2")
alpha_vec = c(0.6, 0.3, 1)
## ----------------------------------------------------------------------
## Convert for comparison
## ----------------------------------------------------------------------
## Compute delta = ATE - ACDE
plot_summary = boot_fullresult2  %>%
  mutate(
    boot_id = rep(
      seq(1, (nrow(boot_fullresult2)/n_outcomez/n_estimatorz), by = 1
      ),
      n_outcomez*n_estimatorz)
  )   %>%
  ## convert to wide-format
  spread(key = estimator, value = boot_est)   %>%
  mutate(
    delta_unbiased = ATE - ACDE_gest
  )
## ----------------------------------------------------------------------
## Bootstrap means and CIs
boot_meanz = plot_summary   %>%
  select(-boot_id)    %>%
  ## convert to long-format
  gather(
    key = estimator, value = boot_est, -(outcome:mediator)
  )   %>%
  select(-mediator)   %>%
  group_by(outcome, estimator)    %>%
  summarize(
    boot_mean = mean(boot_est),
    ci_lower = boot_mean - 1.96*sd(boot_est),
    ci_upper = boot_mean + 1.96*sd(boot_est)
  )   %>% ungroup
## ----------------------------------------------------------------------
## Combine means and CIs, then set colors
boot_summary = boot_meanz   %>%
  # bind_cols(boot_ciz)  %>%
  mutate(
    point_col = ifelse(boot_mean>0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower>0 & ci_upper>0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower<0 & ci_upper<0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col),
    estimator_order = rep(c(3, 2, 1, 4, 5), n_outcomez)
  )   %>%
  arrange(estimator_order)
boot_summary = outcome_lbl  %>%
  left_join(boot_summary, by = c("depvar" = "outcome"))   %>%
  mutate(row_id = row_number())
## ----------------------------------------------------------------------
## Figure parameters
fig_width = 4.85
base_cex = .85
fig_mrgn = c(8.95, 2.4, 1.65, .5) + 0.1    ## figure margin: bottom, left, top, right
y_range = c(-0.12, 0.12)
# y_tick = seq(y_range[1], y_range[2], by = 0.05)
y_tick = seq(-0.1, y_range[2], by = 0.05)
y_tick_sub = seq(-1, 1, by = 0.01)
separation_linez = c(seq(5, 30, by = 5) + 0.5, 40)
separation_linez_sub = seq(3, 33, by = 5) + 0.5
rect_width = 0.5
point_est_lwd = 1.5
## ----------------------------------------------------------------------
## Outcome label
outcome_pos = seq(3, 33, by = 5)
outcome_pos_1 = c(1, 3, 5, 7)
outcome_pos_2 = c(2, 4, 6)
## ----------------------------------------------------------------------
## Estimator and outcome labels
estimator_lbl = c("ATE-ACDE", "ATE (conditioning on X)", "ACDE (g-estimator)", "Naive model w/ M", "Naive model w/ M & Z")
outcome_lbl_vec = boot_summary  %>%
  select(outcome_lbl) %>%
  unique  %>% as.matrix   %>% as.vector
## ----------------------------------------------------------------------
## Plot
pdf(file.path(figure_dir, str_c("fig_8b.pdf")), width = plt_ratio(fig_width), height = fig_width)
par(cex = base_cex, mar = fig_mrgn, lend = "square")
x_range = c(0.25, (nrow(boot_summary)+0.75))
plot(x = boot_summary$row_id, y = boot_summary$boot_mean, ylim = y_range, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)
## Background
for (i in 1:length(separation_linez_sub)) {
  rect(xleft = separation_linez_sub[i], xright = separation_linez[i], ytop = 10, ybottom = -10, col = adjustcolor("gray", alpha = 0.2), lwd = NA)
}
## Separation lines
abline(v = separation_linez)
abline(v = separation_linez_sub, lty = "dashed", lwd = .5)
## Rectangles
rect(xleft = boot_summary$row_id-rect_width/2, xright = boot_summary$row_id+rect_width/2, ybottom = boot_summary$ci_lower, ytop = boot_summary$ci_upper, col = boot_summary$point_col, border = "black", lwd = 0.5)
## Point estimates
# segments(x0 = boot_summary$row_id-0.32, x1 = boot_summary$row_id+0.32, y0 = boot_summary$boot_mean)
segments(x0 = boot_summary$row_id-0.2, x1 = boot_summary$row_id+0.2, y0 = boot_summary$boot_mean, lwd = point_est_lwd)
## ----------------------------------------------------------------------
## Horizontal axis
axis(1, line = 0, at = 1:nrow(boot_summary), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez, label = NA, tck = -1)
# axis(1, line = 0, at = separation_linez_sub, label = NA, tck = -1, lty = "dotted")
## Estimator label
colz = c(rep("black", 3), rep("gray60", 2))
mtext(estimator_lbl, side = 1, line = 0.5, at = 1:nrow(boot_summary), las = 2, cex = 0.8, col = colz)
## Outcome labels
outcome_prefix = c("DV: ", rep("", times = (length(outcome_pos_1)-1)))
mtext(str_c(outcome_prefix, outcome_lbl_vec[outcome_pos_1]), side = 3, line = 0, at = outcome_pos[outcome_pos_1], cex = 0.8)
mtext(outcome_lbl_vec[outcome_pos_2], side = 3, line = 0.85, at = outcome_pos[outcome_pos_2], cex = 0.8)
## ----------------------------------------------------------------------
## Vertical axis
for (j in c(2, 4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)
mtext("Effect Estimates (95% bootstrap CIs)", side = 2, line = 1.65, cex = 0.8)
## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()

dev.off()
## ----------------------------------------------------------------------






## EOF